# -*- codeing = utf-8 -*-
# @Time : 2020/11/3 13:45
# File : hello.py

import re
from bs4 import BeautifulSoup
import urllib.request, urllib.error
import xlwt
import sqlite3
import http.cookiejar

def main():
    baseurl = r'https://movie.douban.com/top250?start=0'
    # 获取数据
    datalist =  getDate(baseurl)
    # 解析数据

    savepath = r'./豆瓣电影top250.xls'
    # 保存数据
    saveData(savepath)

    askURL(r'https://movie.douban.com/top250?start=0')

# 爬取网页
def getDate(baseurl):
    datalist = []
    # 逐一解析数据
    return datalist


# 获取cookie
cookieFine = r'../doubancookie.txt'
cookie = http.cookiejar.MozillaCookieJar(cookieFine)
cookie.load(cookieFine, ignore_discard=True, ignore_expires=True)
#print(cookie)
handler = urllib.request.HTTPCookieProcessor(cookie)
opener = urllib.request.build_opener(handler)

get_url = r'https://movie.douban.com/top250?start=0'
get_request = urllib.request.Request(get_url)
get_response = opener.open(get_request)
print(get_response.read().decode())

# 得到指定一个URL的网页内容
def askURL(url):
    head = {'User-Agent':"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.129 Safari/537.36"}
    request = urllib.request.Request(url, headers=head) # 用户代理，告诉服务器我们是什么类型的机器
    html = ''
    try:
        response = urllib.request.urlopen(request)
        html = response.read().decode('utf-8')
        print(html)
    except urllib.error.URLError as e:
        if hasattr(e, 'code'):
            print(e.code)
        if hasattr(e, 'reason'):
            print(e.reason)

# 保存数据
def saveData(savepath):
    pass


if __name__ == "__main__":
    main()


